Introduction
Unexplained Aerial Phenomena (UAPs), often referred to as UFOs, have captivated public imagination for decades. While their existence remains scientifically unconfirmed, recent increases in reported sightings have driven renewed interest in understanding these enigmatic occurrences. This project aims to leverage the power of machine learning to classify and analyze UAP data, potentially shedding light on their characteristics and possible explanations.
Methodology:
We will combine data analysis with machine learning techniques to achieve two key objectives:
Evaluation and Interpretation
Our models' performance will be evaluated using standard metrics like accuracy, precision, and recall. We will carefully analyze the results to ensure their reliability and interpretability. Feature importance analysis will reveal which attributes contribute most to the classification, potentially offering insights into the characteristics of different UAP categories.
Limitations and Ethical Considerations
We acknowledge the inherent limitations of UAP data, including potential biases, inconsistencies, and subjective interpretations. We will address these limitations through careful data cleaning, feature selection, and transparent reporting of our methodology and results. Additionally, we will be mindful of the ethical implications of our research, avoiding sensationalism or unsubstantiated claims that could contribute to misinformation or stigmatization.
Expected Outcomes
This project aims to contribute to the growing body of knowledge on UAPs by:
Data Visualization and Storytelling
Finally, we will leverage impactful visualization techniques like interactive maps, charts, and graphs to effectively communicate our findings and tell a compelling data-driven story about UAPs. This presentation will engage the audience, promote understanding, and encourage further exploration of these enigmatic phenomena.
Further Steps:
Following this initial proposal, we plan to:
import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil
CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'ufo-sightings:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F388%2F793053%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240313%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240313T114601Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Da02132ff24fd235bd04bf2cf6a4be11ed3ff06009405344f8f3b234b1d5f33112752de599b5661e7abc6800401124a45cce2ee4ab5bed65ebd287d1b13eeee009165d04af02126cfcc660fcb16c815cfc4a78c7fce3dae57baef9d00043eaa1932f4d3c048fee914ac800b5a83f73e68ccd0c2a340cb13efe4cb18b8f7f77400d06984b97bd7fe33b8b12f335f005093a44f3963bf65aeabd97ab5dada55de980bacca39d684643bc46128079e43fc34c7bfd0f7f101dc099a84e3eca70c53d64f11fca6320afc53f9cdd47097c941eeeca6b7c0fcbeeea9277a34011ae83ca8505f4467f8a31a955a4daea5c9bfc7551cdae6b158555e48f98a8b36471d4a89'
KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'
!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)
try:
os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
pass
try:
os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
pass
for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
directory, download_url_encoded = data_source_mapping.split(':')
download_url = unquote(download_url_encoded)
filename = urlparse(download_url).path
destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
try:
with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
total_length = fileres.headers['content-length']
print(f'Downloading {directory}, {total_length} bytes compressed')
dl = 0
data = fileres.read(CHUNK_SIZE)
while len(data) > 0:
dl += len(data)
tfile.write(data)
done = int(50 * dl / int(total_length))
sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
sys.stdout.flush()
data = fileres.read(CHUNK_SIZE)
if filename.endswith('.zip'):
with ZipFile(tfile) as zfile:
zfile.extractall(destination_path)
else:
with tarfile.open(tfile.name) as tarfile:
tarfile.extractall(destination_path)
print(f'\nDownloaded and uncompressed: {directory}')
except HTTPError as e:
print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
continue
except OSError as e:
print(f'Failed to load {download_url} to path {destination_path}')
continue
print('Data source import complete.') Start coding or generate with AI.
Downloading ufo-sightings, 10712686 bytes compressed [==================================================] 10712686 bytes downloaded Downloaded and uncompressed: ufo-sightings Data source import complete.
Double-click (or enter) to edit
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
import plotly.express as px
import re
import spacy
import pandas as pd
from IPython.display import display
import folium
from folium.plugins import MarkerCluster
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from IPython.display import IFrame
import sys
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore") Start coding or generate with AI.
#Load DataSet
ufo = pd.read_csv('/kaggle/input/ufo-sightings/complete.csv', on_bad_lines='skip') Start coding or generate with AI.
<ipython-input-3-687a5194a777>:2: DtypeWarning: Columns (5,9) have mixed types. Specify dtype option on import or set low_memory=False.
ufo = pd.read_csv('/kaggle/input/ufo-sightings/complete.csv', on_bad_lines='skip')
#Check the dataset
ufo.head() Start coding or generate with AI.
ufo['comments'].unique() Start coding or generate with AI.
array(['This event took place in early fall around 1949-50. It occurred after a Boy Scout meeting in the Baptist Church. The Baptist Church sit',
'1949 Lackland AFB, TX. Lights racing across the sky & making 90 degree turns on a dime.',
'Green/Orange circular disc over Chester, England', ...,
'2 witnesses 2 miles apart, Red & White Elongated-Cigar Shaped Flashing lights, NW of Oklahoma City',
'On September ninth my wife and i noticed strange lights in the sky. Two white lights and a red light. I had noticed this the night befo',
'Hovering object lit with red and white lights, no engine sounds, no directional movement.'],
dtype=object)#Check the shape of the dataset
ufo.shape Start coding or generate with AI.
(88679, 11)
#Check the basic information of the dataset
ufo.info() Start coding or generate with AI.
<class 'pandas.core.frame.DataFrame'> RangeIndex: 88679 entries, 0 to 88678 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 datetime 88679 non-null object 1 city 88679 non-null object 2 state 81270 non-null object 3 country 76314 non-null object 4 shape 85757 non-null object 5 duration (seconds) 88677 non-null object 6 duration (hours/min) 85660 non-null object 7 comments 88644 non-null object 8 date posted 88679 non-null object 9 latitude 88679 non-null object 10 longitude 88679 non-null float64 dtypes: float64(1), object(10) memory usage: 7.4+ MB
#Check Nan values
ufo.isnull().sum() Start coding or generate with AI.
datetime 0 city 0 state 7409 country 12365 shape 2922 duration (seconds) 2 duration (hours/min) 3019 comments 35 date posted 0 latitude 0 longitude 0 dtype: int64
#Drop Nan values
ufo.dropna(inplace=True) Start coding or generate with AI.
#Rename columns for clarity
ufo = ufo.rename(columns = {'duration (seconds)': 'duration_second',
'duration (hours/min)': 'duration_min_hours',
'date posted': 'date_posted'}) Start coding or generate with AI.
# Convert the 'datetime' column to datetime format
ufo['datetime'] = pd.to_datetime(ufo['datetime'], errors='coerce')
ufo['date_posted'] = pd.to_datetime(ufo['date_posted'], errors='coerce') Start coding or generate with AI.
#Check Nan values
ufo.isnull().sum() Start coding or generate with AI.
datetime 546 city 0 state 0 country 0 shape 0 duration_second 0 duration_min_hours 0 comments 0 date_posted 0 latitude 0 longitude 0 dtype: int64
## Extract components from columns and create new columns
column = ['datetime', 'date_posted']
for col in column:
ufo[col] = ufo[col].apply(lambda x: x.replace(hour=0) if pd.notnull(x) and x.hour == 24 else x)
ufo['datetime_month'] = ufo['datetime'].dt.month.astype(int)
ufo['datetime_day'] = ufo['datetime'].dt.day.astype(int)
ufo['datetime_year'] = ufo['datetime'].dt.year.astype(int)
ufo['datetime_hour'] = ufo['datetime'].dt.hour.astype(int)
ufo['datetime_min'] = ufo['datetime'].dt.minute.astype(int)
ufo['date_posted_month'] = ufo['date_posted'].dt.month
ufo['date_posted_day'] = ufo['date_posted'].dt.day
ufo['date_posted_year'] = ufo['date_posted'].dt.year
ufo['date_posted_hour'] = ufo['date_posted'].dt.hour
ufo['date_posted_min'] = ufo['date_posted'].dt.minute
# Drop the original 'datetime' and 'date_posted' columns
ufo = ufo.drop(columns=['date_posted', 'datetime'], axis=1) Start coding or generate with AI.
# Create a new column 'comments_len' containing the number of words in the 'comments' column
ufo['comments_len'] = ufo['comments'].map(lambda x: len(x.split()) if isinstance(x, str) else 0)
#ufo.drop(columns='comments', inplace=True) Start coding or generate with AI.
# Create a new column 'duration_minutes' containing time minut
def extract_minutes(comment):
if pd.isnull(comment):
return None
numbers = re.findall(r'\d+', comment)
if len(numbers) == 1:
return int(numbers[0])
elif len(numbers) == 2:
return (int(numbers[0]) + int(numbers[1])) / 2
else:
return None
ufo['duration_minutes'] = ufo['duration_min_hours'].apply(extract_minutes)
print(ufo[['duration_min_hours', 'duration_minutes']])
#Drop the original column
ufo.drop(columns='duration_min_hours', inplace=True) Start coding or generate with AI.
duration_min_hours duration_minutes 0 45 minutes 45.0 3 1/2 hour 1.5 4 15 minutes 15.0 5 5 minutes 5.0 7 20 minutes 20.0 ... ... ... 88674 hour NaN 88675 5 seconds 5.0 88676 17 minutes 17.0 88677 2 nights 2.0 88678 still occuring NaN [68982 rows x 2 columns]
#Drop Nan
ufo.dropna(inplace=True) Start coding or generate with AI.
#Grouping values from the Shape column
shape = {
'changing': ['changed', 'changing'],
'chevron': ['chevron', 'crescent'],
'cylinder': ['cigar', 'cylinder'],
'round': ['circle', 'disk', 'egg', 'hexagon', 'oval', 'round', 'sphere'],
'square': ['cross', 'diamond', 'rectangle'],
'triangle': ['cone', 'delta', 'pyramid', 'triangle'],
'light': ['fireball', 'flare', 'flash', 'light'],
'otherSeen': ['other'],
'otherSeen2': ['dome', 'formation', 'teardrop', 'unknown']
}
for ufo_shape, ali in shape.items():
ufo.loc[ufo['shape'].isin(ali), 'ufo_shape'] = ufo_shape
#Drop original column
ufo.drop(columns='shape', inplace=True) Start coding or generate with AI.
#Check DataSet
ufo.head() Start coding or generate with AI.
#Convert type columns from object to float
ufo['latitude'] = ufo['latitude'].astype('float64')
ufo['duration_second'] = ufo['duration_second'].astype('float64')
#Create list with columns dtype object
object_col = [x for x in ufo.columns if ufo[x].dtype == 'object'] Start coding or generate with AI.
Double-click (or enter) to edit
#A small statistic of the DataSet
ufo.describe().T Start coding or generate with AI.
# Corelation columns
plt.figure(figsize=(12, 5))
sns.heatmap(ufo.select_dtypes(include=['int64', 'float64']).corr(), annot=True)
plt.title('Correlation with Numerical Columns')
plt.show() Start coding or generate with AI.
#Plot histograms of columns
ufo.hist(figsize=(15, 15))
plt.show() Start coding or generate with AI.
#Boxplot for visualisation outliers
def boxplots_custom(dataset, columns_list, rows, cols, suptitle,size=(20,10)):
fig, axs = plt.subplots(rows, cols, figsize=size)
fig.suptitle(suptitle,y=0.93, size=16)
axs = axs.flatten()
for i, data in enumerate(columns_list):
if i % cols == 0:
axs[i].set_ylabel('Values')
sns.boxplot( data=dataset[data], orient='v', ax=axs[i])
axs[i].set_title(data)
boxplots_custom(dataset=ufo, columns_list=ufo.select_dtypes(include=('int64', 'float64')).columns.tolist(), rows=4, cols=6, suptitle='We Visualize if anomalies persist in columns') Start coding or generate with AI.
#Remove outliers of 2 columns
print(f'Numbers rows before deleting outliers [duration_second]: {len(ufo)}')
ufo = ufo[ufo['duration_second'] < 50000000.0]
print(f'Numbers rows after deleting outliers [duration_second]: {len(ufo)}')
print(f'Numbers rows before deleting outliers [duration_minutes]: {len(ufo)}')
ufo = ufo[ufo['duration_minutes'] < 200000]
print(f'Numbers rows after deleting outliers [duration_minutes]: {len(ufo)}') Start coding or generate with AI.
Numbers rows before deleting outliers [duration_second]: 63448 Numbers rows after deleting outliers [duration_second]: 63444 Numbers rows before deleting outliers [duration_minutes]: 63444 Numbers rows after deleting outliers [duration_minutes]: 63442
#In which countries and states are the most sightings
plt.figure(figsize=(16,6))
sns.lineplot(x=ufo['state'].str.upper(), y=ufo['datetime_year'], data=ufo, ci=None, hue='country')
plt.title('Visualisation UFO by State/Country')
plt.xlabel('State')
plt.ylabel('Year')
plt.xticks(rotation=45)
plt.ylim(1960, ufo['datetime_year'].max()+30)
plt.show() Start coding or generate with AI.
#Visualizing the most frequently observed UFO shape
plt.figure(figsize=(10, 6))
sns.countplot(x='ufo_shape', data=ufo, palette='viridis')
plt.xlabel('UFO Shape')
plt.ylabel('Count')
plt.title('Types of UFO Seen')
plt.show() Start coding or generate with AI.
ufo['ufo_shape'].value_counts() Start coding or generate with AI.
light 19687 round 18130 otherSeen2 7097 triangle 6815 otherSeen 4458 cylinder 2667 square 2197 changing 1566 chevron 825 Name: ufo_shape, dtype: int64
#Create a new column by grouped visualization of UFO sightings during the day or night
ufo['time_of_day'] = ''
ufo.loc[(ufo['datetime_hour'] >= 6) & (ufo['datetime_hour'] <= 18), 'time_of_day'] = 'Day'
ufo.loc[(ufo['datetime_hour'] > 18) | (ufo['datetime_hour'] < 6), 'time_of_day'] = 'Night'
obsev_by_time_of_day = ufo['time_of_day'].value_counts()
ufo['datetime_season'] = ''
ufo.loc[(ufo['datetime_month'] >= 3) & (ufo['datetime_month'] <= 5), 'datetime_season'] = 'spring'
ufo.loc[(ufo['datetime_month'] >= 6) & (ufo['datetime_month'] <= 8), 'datetime_season'] = 'summer'
ufo.loc[(ufo['datetime_month'] >= 9) & (ufo['datetime_month'] <= 11), 'datetime_season'] = 'autumn'
ufo.loc[(ufo['datetime_month'] == 12) | (ufo['datetime_month'] == 1) | (ufo['datetime_month'] == 2), 'datetime_season'] = 'winter'
obsev_by_time_of_season = ufo['datetime_season'].value_counts()
print(obsev_by_time_of_day)
print(obsev_by_time_of_season) Start coding or generate with AI.
Night 47736 Day 15706 Name: time_of_day, dtype: int64 summer 20666 autumn 17513 spring 12759 winter 12504 Name: datetime_season, dtype: int64
#Observation by time of Season what type of UFO
plt.figure(figsize=(14, 6))
sns.countplot(data=ufo, x='datetime_season', hue='ufo_shape', order=ufo['datetime_season'].value_counts().index)
sns.color_palette("magma")
plt.title('When and what type of UFO was observed at Season')
plt.xlabel('Time of Day')
plt.ylabel('Number')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show() Start coding or generate with AI.
#Observation by time of day what type of UFO
plt.figure(figsize=(14, 6))
sns.countplot(data=ufo, x='ufo_shape', hue='time_of_day', order=ufo['ufo_shape'].value_counts().index)
sns.color_palette("rocket")
plt.title('When and what type of UFO was observed during the day')
plt.xlabel('Shape UFO/Time of Day')
plt.ylabel('Number')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show() Start coding or generate with AI.
# Horizontal Histogram of Comments Length
plt.figure(figsize=(12, 6))
sns.set_theme(style="whitegrid")
sns.histplot(y=ufo['comments_len'], bins=20, kde=False, color='blue', hue=ufo['ufo_shape'],
element="step", stat="count", common_norm=False)
plt.ylabel("Comments Length")
plt.xlabel("Number of Observations")
plt.title("Distribution of Comments Length about UFO")
plt.show() Start coding or generate with AI.
#This code utilizes spaCy for semantic analysis of comments related to UFO sightings
nlp = spacy.load("en_core_web_sm")
general_nouns = ['location', 'area', 'region', 'territory', 'space', 'place',
'spot', 'site', 'beach']
specific_nouns = ['field', 'desert', 'mountain', 'forest', 'city', 'village',
'ocean', 'river', 'lake', 'surroundings', 'countryside',
'landscape']
ufo_context_terms = ['aviation', 'restricted', 'high altitude', 'airport', 'air',
'ufo sighting location', 'hotspot','military', 'base',
'government', 'defense', 'center', 'secure', 'strategic',
'high security', 'security', 'intelligence headquarters',
'pentagon', 'white house']
emotions = ['amazement', 'wonder', 'fear', 'excitement', 'distrust', 'hatred',
'displeasure', 'misunderstanding', 'concern', 'hope', 'delight',
'curiosity', 'helplessness', 'happiness', 'distrust', 'panic',
'aversion', 'worry', 'awe', 'confusion',
'love', 'gratitude', 'joy', 'contentment', 'grace', 'serenity',
'euphoria', 'optimism', 'pride', 'satisfaction', 'compassion',
'anger', 'disgust', 'despair', 'grief', 'guilt', 'shame',
'anxiety', 'regret', 'sadness', 'loneliness', 'frustration']
comments = ufo['comments'].astype(object)
chunk_lst_general = []
chunk_lst_specific = []
entity_lst = []
emotions_lst = []
for text in comments:
doc = nlp(text)
for chunk in doc.noun_chunks:
if chunk.text.upper() in general_nouns or chunk.text.lower() in general_nouns or chunk.text in general_nouns:
chunk_lst_general.append(chunk.text)
print(f"General noun for locations: {chunk.text}")
elif chunk.text.upper() in specific_nouns or chunk.text.lower() in specific_nouns or chunk.text in specific_nouns:
chunk_lst_specific.append(chunk.text)
print(f"Specific noun for locations: {chunk.text}")
elif chunk.text.upper() in emotions or chunk.text.lower() in emotions or chunk.text in emotions:
emotions_lst.append(chunk.text)
print(f"Emotions noun for locations: {chunk.text}")
for entity in doc.ents:
if entity.text.upper() in ufo_context_terms or entity.text.lower() in ufo_context_terms or entity.text in ufo_context_terms:
entity_lst.append(entity.text)
print(f"UFO context-specific term: {entity.text}") Start coding or generate with AI.
General noun for locations: place Specific noun for locations: city General noun for locations: Area General noun for locations: space General noun for locations: location Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: field General noun for locations: space General noun for locations: place Specific noun for locations: field Specific noun for locations: field General noun for locations: space Specific noun for locations: ocean General noun for locations: area Specific noun for locations: field Specific noun for locations: lake Specific noun for locations: field Specific noun for locations: lake Specific noun for locations: river Specific noun for locations: city Specific noun for locations: lake Specific noun for locations: ocean General noun for locations: space General noun for locations: place General noun for locations: place General noun for locations: space General noun for locations: area Specific noun for locations: ocean Specific noun for locations: lake Specific noun for locations: field Specific noun for locations: field General noun for locations: space Specific noun for locations: field Specific noun for locations: city Specific noun for locations: field Specific noun for locations: field Specific noun for locations: field General noun for locations: area General noun for locations: area Specific noun for locations: field General noun for locations: space General noun for locations: Space General noun for locations: area General noun for locations: space Specific noun for locations: Lake General noun for locations: site Specific noun for locations: city Specific noun for locations: mountain General noun for locations: place Specific noun for locations: ocean General noun for locations: space General noun for locations: space Specific noun for locations: city Specific noun for locations: field Specific noun for locations: Lake Specific noun for locations: river Specific noun for locations: river Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: city Specific noun for locations: lake General noun for locations: area Specific noun for locations: field UFO context-specific term: PENTAGON Specific noun for locations: surroundings Specific noun for locations: ocean Specific noun for locations: ocean General noun for locations: space UFO context-specific term: Military Emotions noun for locations: concern Specific noun for locations: ocean General noun for locations: place General noun for locations: space General noun for locations: area General noun for locations: space General noun for locations: SPACE General noun for locations: Location Specific noun for locations: field General noun for locations: place General noun for locations: beach Specific noun for locations: ocean Specific noun for locations: landscape General noun for locations: space Specific noun for locations: city General noun for locations: beach Specific noun for locations: ocean General noun for locations: space Specific noun for locations: field General noun for locations: space Specific noun for locations: forest General noun for locations: place Specific noun for locations: city General noun for locations: space General noun for locations: place General noun for locations: space General noun for locations: place Specific noun for locations: City Specific noun for locations: field Specific noun for locations: ocean Specific noun for locations: lake Specific noun for locations: mountain Specific noun for locations: city General noun for locations: area Specific noun for locations: desert Specific noun for locations: ocean General noun for locations: place Specific noun for locations: Lake Specific noun for locations: ocean Specific noun for locations: field Specific noun for locations: city General noun for locations: space General noun for locations: place Specific noun for locations: field Specific noun for locations: mountain General noun for locations: area Specific noun for locations: lake Specific noun for locations: Lake General noun for locations: place General noun for locations: place Specific noun for locations: field General noun for locations: place General noun for locations: space Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: river General noun for locations: SPACE General noun for locations: place Specific noun for locations: Ocean General noun for locations: space Emotions noun for locations: confusion Specific noun for locations: ocean General noun for locations: place General noun for locations: Place Specific noun for locations: desert Specific noun for locations: ocean Specific noun for locations: river Specific noun for locations: lake Specific noun for locations: field Specific noun for locations: city General noun for locations: space General noun for locations: space Specific noun for locations: ocean Specific noun for locations: field UFO context-specific term: pentagon General noun for locations: space Specific noun for locations: ocean UFO context-specific term: pentagon General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space Specific noun for locations: field Specific noun for locations: field Specific noun for locations: ocean General noun for locations: area General noun for locations: Area UFO context-specific term: Pentagon General noun for locations: space UFO context-specific term: pentagon General noun for locations: location General noun for locations: location Specific noun for locations: field General noun for locations: space Specific noun for locations: field General noun for locations: area General noun for locations: Spot Specific noun for locations: Ocean Specific noun for locations: city Specific noun for locations: lake Specific noun for locations: Lake Specific noun for locations: ocean General noun for locations: space General noun for locations: place Specific noun for locations: city Specific noun for locations: desert Specific noun for locations: ocean Specific noun for locations: mountain Specific noun for locations: Desert General noun for locations: place Specific noun for locations: lake Specific noun for locations: field Specific noun for locations: city Specific noun for locations: city Specific noun for locations: city Specific noun for locations: city Specific noun for locations: city Specific noun for locations: field General noun for locations: space Specific noun for locations: mountain Specific noun for locations: mountain Specific noun for locations: river General noun for locations: place Specific noun for locations: mountain Specific noun for locations: field Specific noun for locations: ocean General noun for locations: Area General noun for locations: area Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: lake Specific noun for locations: forest Specific noun for locations: forest Specific noun for locations: city General noun for locations: Spot Specific noun for locations: field General noun for locations: site Specific noun for locations: Mountain Specific noun for locations: city Specific noun for locations: ocean General noun for locations: location Specific noun for locations: mountain Specific noun for locations: ocean Specific noun for locations: mountain General noun for locations: SPACE Specific noun for locations: city Specific noun for locations: city General noun for locations: location General noun for locations: SPACE General noun for locations: spot General noun for locations: place General noun for locations: place General noun for locations: place General noun for locations: space General noun for locations: space General noun for locations: place General noun for locations: space General noun for locations: space General noun for locations: Location Specific noun for locations: city General noun for locations: Area General noun for locations: place General noun for locations: space General noun for locations: area Specific noun for locations: ocean General noun for locations: space General noun for locations: beach Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: field General noun for locations: space General noun for locations: space Specific noun for locations: ocean UFO context-specific term: White House Specific noun for locations: city Specific noun for locations: field General noun for locations: space General noun for locations: area General noun for locations: site General noun for locations: area Specific noun for locations: mountain General noun for locations: Area General noun for locations: place General noun for locations: area General noun for locations: site Specific noun for locations: ocean General noun for locations: place General noun for locations: place Specific noun for locations: ocean General noun for locations: area Specific noun for locations: field General noun for locations: SPACE UFO context-specific term: Airport Specific noun for locations: city Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: field General noun for locations: location General noun for locations: area Specific noun for locations: lake General noun for locations: beach Specific noun for locations: mountain Specific noun for locations: Lake General noun for locations: PLACE Specific noun for locations: ocean Specific noun for locations: mountain General noun for locations: place General noun for locations: Area General noun for locations: space Specific noun for locations: field Specific noun for locations: field Specific noun for locations: mountain General noun for locations: space Specific noun for locations: city General noun for locations: area General noun for locations: place UFO context-specific term: pentagon Emotions noun for locations: awe Specific noun for locations: city Specific noun for locations: city Specific noun for locations: city Specific noun for locations: city General noun for locations: place General noun for locations: place General noun for locations: area General noun for locations: space Specific noun for locations: lake Specific noun for locations: Lake Emotions noun for locations: panic General noun for locations: place General noun for locations: area Specific noun for locations: ocean Specific noun for locations: field Specific noun for locations: Field General noun for locations: Area General noun for locations: area General noun for locations: area General noun for locations: area Specific noun for locations: city General noun for locations: area General noun for locations: place Specific noun for locations: ocean General noun for locations: space General noun for locations: place General noun for locations: area Specific noun for locations: river General noun for locations: space General noun for locations: space General noun for locations: place General noun for locations: space Specific noun for locations: river General noun for locations: Site Specific noun for locations: CITY Specific noun for locations: lake Emotions noun for locations: fear General noun for locations: area General noun for locations: area Specific noun for locations: river General noun for locations: place General noun for locations: place Specific noun for locations: City General noun for locations: area General noun for locations: place General noun for locations: beach General noun for locations: place General noun for locations: area Specific noun for locations: lake Specific noun for locations: City Specific noun for locations: city Specific noun for locations: ocean General noun for locations: space General noun for locations: space Specific noun for locations: city General noun for locations: space General noun for locations: LOCATION Specific noun for locations: desert Specific noun for locations: lake UFO context-specific term: Airport Specific noun for locations: City General noun for locations: area General noun for locations: site General noun for locations: place General noun for locations: beach Specific noun for locations: Mountain General noun for locations: space General noun for locations: area Specific noun for locations: field General noun for locations: space Specific noun for locations: mountain General noun for locations: location Specific noun for locations: landscape Specific noun for locations: river Specific noun for locations: surroundings General noun for locations: space Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: city General noun for locations: place Emotions noun for locations: worry General noun for locations: place General noun for locations: place Specific noun for locations: ocean General noun for locations: site Specific noun for locations: field General noun for locations: space General noun for locations: space General noun for locations: space Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: ocean General noun for locations: area Specific noun for locations: ocean General noun for locations: Area General noun for locations: place General noun for locations: place General noun for locations: space Specific noun for locations: field Specific noun for locations: ocean Specific noun for locations: city General noun for locations: space General noun for locations: place General noun for locations: place General noun for locations: place General noun for locations: location General noun for locations: place Specific noun for locations: lake General noun for locations: space General noun for locations: place Specific noun for locations: field General noun for locations: place Specific noun for locations: city Specific noun for locations: city General noun for locations: area Specific noun for locations: mountain Specific noun for locations: lake General noun for locations: space Specific noun for locations: ocean General noun for locations: SPACE Specific noun for locations: ocean Specific noun for locations: field General noun for locations: site Specific noun for locations: ocean General noun for locations: space General noun for locations: space Specific noun for locations: ocean General noun for locations: site Specific noun for locations: field General noun for locations: space General noun for locations: space Specific noun for locations: field General noun for locations: space General noun for locations: space Specific noun for locations: ocean General noun for locations: place General noun for locations: space General noun for locations: Space Specific noun for locations: Desert Specific noun for locations: river General noun for locations: space Specific noun for locations: field Emotions noun for locations: awe Specific noun for locations: ocean General noun for locations: Area Specific noun for locations: field Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: desert General noun for locations: space General noun for locations: SPACE Specific noun for locations: lake Specific noun for locations: Forest General noun for locations: space Specific noun for locations: ocean Specific noun for locations: field General noun for locations: SPACE General noun for locations: space General noun for locations: area Emotions noun for locations: love General noun for locations: space General noun for locations: beach General noun for locations: place General noun for locations: place General noun for locations: area Emotions noun for locations: amazement General noun for locations: place Specific noun for locations: city Specific noun for locations: ocean Specific noun for locations: mountain General noun for locations: beach General noun for locations: space General noun for locations: area General noun for locations: place Specific noun for locations: City Specific noun for locations: city General noun for locations: space General noun for locations: place Specific noun for locations: ocean General noun for locations: beach Specific noun for locations: City General noun for locations: Area General noun for locations: space General noun for locations: area General noun for locations: area General noun for locations: area General noun for locations: space Specific noun for locations: desert General noun for locations: space Specific noun for locations: city General noun for locations: area General noun for locations: space General noun for locations: place General noun for locations: place General noun for locations: space General noun for locations: area Specific noun for locations: ocean General noun for locations: area Specific noun for locations: ocean General noun for locations: space Specific noun for locations: ocean Specific noun for locations: desert Specific noun for locations: Field Specific noun for locations: field Specific noun for locations: ocean General noun for locations: space Emotions noun for locations: awe Specific noun for locations: ocean Specific noun for locations: City Specific noun for locations: river General noun for locations: space General noun for locations: area Emotions noun for locations: panic General noun for locations: Location General noun for locations: PLACE General noun for locations: area Specific noun for locations: field Specific noun for locations: field Specific noun for locations: lake General noun for locations: space Specific noun for locations: city Specific noun for locations: city General noun for locations: place General noun for locations: space Specific noun for locations: desert General noun for locations: place General noun for locations: area Specific noun for locations: city Specific noun for locations: Desert General noun for locations: space Specific noun for locations: ocean Specific noun for locations: field General noun for locations: space Specific noun for locations: ocean Specific noun for locations: ocean General noun for locations: place General noun for locations: beach Specific noun for locations: ocean Specific noun for locations: city General noun for locations: beach Specific noun for locations: City Specific noun for locations: ocean General noun for locations: area Specific noun for locations: field Specific noun for locations: lake Specific noun for locations: field Specific noun for locations: ocean Specific noun for locations: lake General noun for locations: site General noun for locations: space Specific noun for locations: mountain UFO context-specific term: pentagon General noun for locations: area General noun for locations: beach General noun for locations: place Specific noun for locations: ocean Specific noun for locations: ocean General noun for locations: place General noun for locations: space Specific noun for locations: ocean General noun for locations: site General noun for locations: place Emotions noun for locations: Grace General noun for locations: space General noun for locations: space General noun for locations: space Specific noun for locations: ocean General noun for locations: place Specific noun for locations: ocean Specific noun for locations: field Specific noun for locations: ocean Specific noun for locations: ocean Specific noun for locations: lake General noun for locations: AREA General noun for locations: space Specific noun for locations: mountain General noun for locations: space General noun for locations: SPACE Specific noun for locations: Lake General noun for locations: space Specific noun for locations: lake Specific noun for locations: Ocean Specific noun for locations: Lake Specific noun for locations: field Specific noun for locations: field General noun for locations: area Specific noun for locations: field Specific noun for locations: city General noun for locations: space Specific noun for locations: lake General noun for locations: place General noun for locations: space General noun for locations: Location UFO context-specific term: pentagon General noun for locations: space Specific noun for locations: ocean General noun for locations: Location Specific noun for locations: ocean Specific noun for locations: field General noun for locations: beach Specific noun for locations: ocean Specific noun for locations: mountain General noun for locations: place Specific noun for locations: Lake UFO context-specific term: pentagon Specific noun for locations: mountain General noun for locations: space Specific noun for locations: lake Specific noun for locations: ocean General noun for locations: Beach Specific noun for locations: lake General noun for locations: beach General noun for locations: space Specific noun for locations: river General noun for locations: spot Emotions noun for locations: awe Specific noun for locations: mountain Specific noun for locations: lake General noun for locations: site Specific noun for locations: city Specific noun for locations: mountain General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: space General noun for locations: area Specific noun for locations: ocean Specific noun for locations: city Emotions noun for locations: awe Specific noun for locations: lake Specific noun for locations: ocean Specific noun for locations: lake General noun for locations: space General noun for locations: Beach Specific noun for locations: ocean General noun for locations: place General noun for locations: space General noun for locations: site Specific noun for locations: field General noun for locations: beach General noun for locations: space General noun for locations: space Specific noun for locations: river Specific noun for locations: mountain General noun for locations: beach General noun for locations: space Emotions noun for locations: joy General noun for locations: site Specific noun for locations: lake UFO context-specific term: Airport General noun for locations: place General noun for locations: space General noun for locations: area Specific noun for locations: city Specific noun for locations: Lake General noun for locations: beach Specific noun for locations: ocean General noun for locations: Area General noun for locations: space General noun for locations: space General noun for locations: site General noun for locations: space General noun for locations: space Specific noun for locations: Ocean General noun for locations: site General noun for locations: space Specific noun for locations: mountain General noun for locations: space Specific noun for locations: lake Specific noun for locations: river Specific noun for locations: lake General noun for locations: location Specific noun for locations: mountain Specific noun for locations: Field General noun for locations: area Specific noun for locations: ocean Specific noun for locations: ocean Specific noun for locations: field General noun for locations: beach General noun for locations: Location Specific noun for locations: ocean Specific noun for locations: ocean General noun for locations: place Specific noun for locations: city General noun for locations: space Specific noun for locations: mountain General noun for locations: space Specific noun for locations: ocean General noun for locations: BEACH Emotions noun for locations: awe Specific noun for locations: ocean Specific noun for locations: city Specific noun for locations: City General noun for locations: space Specific noun for locations: river Specific noun for locations: ocean Specific noun for locations: mountain General noun for locations: place Specific noun for locations: mountain Specific noun for locations: ocean General noun for locations: space Specific noun for locations: city Specific noun for locations: field General noun for locations: space Specific noun for locations: forest Emotions noun for locations: awe General noun for locations: space Specific noun for locations: lake Specific noun for locations: Lake General noun for locations: place General noun for locations: space General noun for locations: location General noun for locations: space General noun for locations: site General noun for locations: site General noun for locations: space General noun for locations: place Specific noun for locations: field Specific noun for locations: mountain General noun for locations: site General noun for locations: space Specific noun for locations: city General noun for locations: Beach General noun for locations: beach Specific noun for locations: Lake Specific noun for locations: field General noun for locations: beach General noun for locations: place Specific noun for locations: ocean General noun for locations: area General noun for locations: area General noun for locations: Location Specific noun for locations: city General noun for locations: place Specific noun for locations: ocean General noun for locations: space General noun for locations: space General noun for locations: area Specific noun for locations: field General noun for locations: space General noun for locations: space Specific noun for locations: Ocean Emotions noun for locations: AMAZEMENT General noun for locations: space Specific noun for locations: field Specific noun for locations: MOUNTAIN Specific noun for locations: lake General noun for locations: space General noun for locations: space
# Create lowercased lists of words from different sources
general_words = [txt.lower() for txt in chunk_lst_general]
specific_words = [txt.lower() for txt in chunk_lst_specific]
entity_words = [txt.lower() for txt in entity_lst]
emotion_words = [txt.lower() for txt in emotions_lst]
# Combine
key_word = general_words + specific_words + entity_words
# Create a pandas DataFrame
df_key_word = pd.DataFrame({'location': key_word})
df_emotions = pd.DataFrame({'emotion': emotion_words}) Start coding or generate with AI.
# Create a countplot visualised where UFO most frequently observed people
custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="ticks", rc=custom_params)
plt.subplots(figsize=(15, 6))
sns.countplot(data=df_key_word, x="location", palette="vlag", order=df_key_word['location'].value_counts().index)
plt.xticks(rotation=45)
plt.title('Statistics on where UFO have been most frequently observed people')
plt.show() Start coding or generate with AI.
# Group emotions in the DataFrame
df_emo = df_emotions.groupby('emotion').size().reset_index(name='emotion_count')
# Calculate the percentage of each emotion
df_emo['percentage_emotion'] = round(df_emo['emotion_count'] * 100 / df_emo['emotion_count'].sum(), 1) Start coding or generate with AI.
# Creating a barplot to visualize on a graph the emotions people experience when they see UFOs
fig = plt.figure(figsize=(20, 4))
a = sns.barplot(data=df_emo, x='percentage_emotion', y='emotion',
palette='mako', linestyle="-", linewidth=1, edgecolor="black")
plt.xticks(size=13, color='black')
plt.yticks(size=13, color='black')
plt.title("People's emotions when they see UFO", pad=15, size=25)
plt.xlabel('')
plt.ylabel('')
for i, v in enumerate(df_emo['percentage_emotion']):
a.text(v + 0.5, i, f"{v}%", color='black', fontsize=16)
for j in ['right', 'top', 'left', 'bottom']:
a.spines[j].set_visible(False)
plt.show() Start coding or generate with AI.
#Visualized duration of UFO sightings in the sky by Country
plt.figure(figsize=(12, 5))
sns.barplot(x=ufo['country'].str.upper(), y=ufo['duration_second'], ci=None, palette='cubehelix', hue=ufo['ufo_shape'])
plt.title('We visualize how long the Event lasted')
plt.xlabel('Country')
plt.ylabel('Duration Visualisation UFO')
plt.xticks(rotation=45)
plt.legend(loc='upper right')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show() Start coding or generate with AI.
# Copy the UFO dataset and identify columns with object data type
ufo_cop = ufo.copy()
object_columns = list(ufo_cop.select_dtypes(include='object')) Start coding or generate with AI.
# Encoding Object Columns
le = LabelEncoder()
for col in object_columns:
ufo_cop[col] = le.fit_transform(ufo_cop[col]) Start coding or generate with AI.
# Check DAtaSet
ufo_cop.head(3) Start coding or generate with AI.
# Create target variable and feature Matrix X
X = ufo_cop.drop(columns=['latitude', 'longitude'], axis=1)
y_latitude = ufo_cop['latitude']
y_longitude = ufo_cop['longitude'] Start coding or generate with AI.
# Split the dataset into training and testing sets
X_train, X_test, y_train_latitude, y_test_latitude, y_train_longitude, y_test_longitude = train_test_split(X, y_latitude, y_longitude, test_size=0.2, random_state=1)
mms = MinMaxScaler()
X = mms.fit_transform(X) Start coding or generate with AI.
# Initialize Random Forest Regressor models
"""param_grid = {
'max_features' : ["auto", "sqrt", "log2"],
'min_samples_split' : np.linspace(0.1, 1.0, 5),
'max_depth' : [x for x in range(1,12)],
'n_estimators' : [10, 50, 100]
}
cv_latitude = GridSearchCV(estimator= RandomForestRegressor(), param_grid= param_grid, cv= 3, n_jobs= -1)
cv_latitude.fit(X_train, y_train_latitude)
best_param_latitude = cv_latitude.best_params_
cv_longitude = GridSearchCV(estimator= RandomForestRegressor(), param_grid= param_grid, cv= 3, n_jobs= -1)
cv_longitude.fit(X_train, y_train_longitude)
best_param_longitude = cv_longitude.best_params_"""
# Train the models
rf_latitude = RandomForestRegressor()
rf_latitude.fit(X_train, y_train_latitude)
rf_longitude = RandomForestRegressor()
rf_longitude.fit(X_train, y_train_longitude)
# Make predictions
y_pred_latitude_rf = rf_latitude.predict(X_test)
y_pred_longitude_rf = rf_longitude.predict(X_test)
# Evaluate the performance
mae_latitude_rf = mean_absolute_error(y_test_latitude, y_pred_latitude_rf)
mse_latitude_rf = mean_squared_error(y_test_latitude, y_pred_latitude_rf)
r2_latitude_rf = r2_score(y_test_latitude, y_pred_latitude_rf)
mae_longitude_rf = mean_absolute_error(y_test_longitude, y_pred_longitude_rf)
mse_longitude_rf = mean_squared_error(y_test_longitude, y_pred_longitude_rf)
r2_longitude_rf = r2_score(y_test_longitude, y_pred_longitude_rf)
# Print the evaluation metrics
print(f"mean_absolute_error latitude: {mae_latitude_rf}")
print(f"mean_squared_error latitude: {mse_latitude_rf}")
print(f"r2_score latitude: {r2_latitude_rf}\n")
print(f"mean_absolute_error longitude: {mae_longitude_rf}")
print(f"mean_squared_error longitude: {mse_longitude_rf}")
print(f"r2_score longitude: {r2_longitude_rf}") Start coding or generate with AI.
mean_absolute_error latitude: 0.4878670477201303 mean_squared_error latitude: 0.7030820476046091 r2_score latitude: 0.9791145608578209 mean_absolute_error longitude: 0.7320939413896774 mean_squared_error longitude: 3.7488816792932673 r2_score longitude: 0.988784530005853
# Visualised Real Values, Predicted Values and Errors Distribution
plt.figure(figsize=(18, 10))
plt.subplot(2, 2, 1)
plt.scatter(y_test_latitude, y_pred_latitude_rf, color= 'green', edgecolors='black')
plt.xlabel('Real Values latitude')
plt.ylabel('Predict Values latitude')
plt.title('Scatter plot: Real Values latitude vs Predicted Values latitude')
plt.grid()
plt.subplot(2, 2, 2)
plt.scatter(y_test_longitude, y_pred_longitude_rf, color= 'blue', edgecolors='black')
plt.xlabel('Real Values longitude')
plt.ylabel('Predict Values longitude')
plt.title('Scatter plot: Real Values longitude vs Predicted Values longitude')
plt.grid()
error_ml_latitude = y_test_latitude - y_pred_latitude_rf
plt.subplot(2, 2, 3)
plt.hist(error_ml_latitude, bins=50, edgecolor= 'black', color='green')
plt.xlabel('Errors Latitude')
plt.ylabel('Frequence Latitude')
plt.title('Title: Errors Distribution Latitude')
plt.grid()
error_ml_logitude = y_test_longitude - y_pred_longitude_rf
plt.subplot(2, 2, 4)
plt.hist(error_ml_logitude, bins=50, edgecolor= 'black', color='blue')
plt.xlabel('Errors Longitude')
plt.ylabel('Frequence Longitude')
plt.title('Title: Errors Distribution Longitude')
plt.grid()
plt.show() Start coding or generate with AI.
Scatterplot: This scatterplot graphically represents the model's performance and exhibits remarkable accuracy. Virtually all points form a straight diagonal line from the bottom-left corner to the top-right corner. This pattern suggests an excellent fit of the model to the data, highlighting a notable alignment.
Histogram: The histogram graph provides insights into the frequency of differences between predicted and actual values of the model. Emphasizing symmetry, centrality, and dispersion, we observe that the histogram is centered around the value 0. This feature indicates a correct trend in the model overall, as most errors revolve around the central point.
df_pred_lat = pd.DataFrame({'predict_latitude_reg': y_pred_latitude_rf})
df_pred_lon = pd.DataFrame({'predict_longitude_reg': y_pred_longitude_rf}) Start coding or generate with AI.
ufo_cop['predict_latitude_ufo'] = df_pred_lat['predict_latitude_reg']
ufo_cop['predict_longitude_ufo'] = df_pred_lon['predict_longitude_reg'] Start coding or generate with AI.
# Visualize predictions in interactive graphics
# The green color represents predictions, while the blue color represents real data
"""map_center = [ufo_cop['latitude'].mean(), ufo_cop['longitude'].mean()]
mymap = folium.Map(location=map_center, zoom_start=5)
for index, row in ufo_cop.iterrows():
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=3,
color='blue',
fill=True,
fill_color='blue'
).add_to(mymap)
for index, row in ufo_cop.iterrows():
if not pd.isnull(row['predict_latitude_ufo']) and not pd.isnull(row['predict_longitude_ufo']):
folium.CircleMarker(
location=[row['predict_latitude_ufo'], row['predict_longitude_ufo']],
radius=3,
color='green',
fill=True,
fill_color='green'
).add_to(mymap)
map_html = "temp_map.html"
mymap.save(map_html)
IFrame(src=map_html, width='100%', height=500)""" Start coding or generate with AI.
# Plot: Visualised Real Data and Predict Data
plt.figure(figsize=(14, 8))
plt.scatter(ufo_cop['longitude'], ufo_cop['latitude'], color='blue', label='Real Data', s=3)
mask = ~ufo_cop['predict_latitude_ufo'].isnull() & ~ufo_cop['predict_longitude_ufo'].isnull()
plt.scatter(ufo_cop.loc[mask, 'predict_longitude_ufo'], ufo_cop.loc[mask, 'predict_latitude_ufo'], color='green', label='Predictions', s=3)
plt.title('Scatter Plot: Real Data vs Predictions')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.xlim(-175, -25)
plt.ylim(20, 70)
plt.legend()
plt.show() Start coding or generate with AI.